PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Home_Kitchen\Cluster Analysis\price_changes_sellertype_July2025.csv"
OUT=price_changes_sellertype
DBMS=csv REPLACE;
run;
PROC IMPORT 
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Home_Kitchen\Cluster Analysis\Sellerclusters_cooker_July2025.csv"
out = SELLER_TYPE_TREND1
DBMS=csv REPLACE;
run;
PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Home_Kitchen\Cluster Analysis\pricechangetrend_outcome_Jul2025.csv" 
out=final_outputvector
DBMS=csv REPLACE;
run;



/***subsetting for the relevant analysis SKUs***/

data relevantSKUsonly;
set price_changes_sellertype;
where ASIN in ('B013I40R8E', 'B06VV9M11N', 'B0051O92U4','B00FLYWNYQ','B003HF6PUO') ; 
run;

data relevantSKUs;
set price_changes_sellertype;
where ASIN in ('B013I40R8E', 'B06VV9M11N', 'B0051O92U4','B00FLYWNYQ','B003HF6PUO') and seller_type in ('Amazon New', 'Cluster 1', 'Cluster 2',
'Cluster 3', 'Cluster 4', 'Cluster 5'); 
run;


data relevantSKUs_used;
set price_changes_sellertype;
where ASIN in ('B013I40R8E', 'B06VV9M11N', 'B0051O92U4','B00FLYWNYQ','B003HF6PUO') and seller_type not in ('Amazon New', 'Cluster 1', 'Cluster 2',
'Cluster 3', 'Cluster 4', 'Cluster 5'); 
run;

PROC SQL;
CREATE TABLE numsellers_newoffers AS
SELECT
TIME1,
ASIN,
COUNT(DISTINCT SELLER_ID) AS NUM_SELLERS
from 
relevantSKUs
group by
time1,
ASIN;
quit;

PROC SQL;
CREATE TABLE numsellersbyeachtype_newoffers AS
SELECT
TIME1,
ASIN,
seller_type,
COUNT(DISTINCT SELLER_ID) AS NUM_SELLERS
from 
relevantSKUsonly
group by
time1,
ASIN,
seller_type;
quit;

/**creating num sellers DV**/
proc sort data = numsellers_newoffers (keep=time1) nodupkey out= final_numsellers_outputvector;
by time1;
run;

%macro numsellers(b,c);
data numsellers&b. (keep = time1 NUM_SELLERS_&b.);
set numsellers_newoffers;
where ASIN = &c.;
rename NUM_SELLERS = NUM_SELLERS_&b. ;
run;

data final_numsellers_outputvector ;
merge final_numsellers_outputvector (in=a) numsellers&b. (in=b);
by time1;
if a ;
if a and not b then NUM_SELLERS_&b. = 0;
run;
%mend;
%numsellers(b = Breville, c = 'B013I40R8E'); 
%numsellers(b = Cosori, c = 'B06VV9M11N'); 
%numsellers(b = EP, c = 'B0051O92U4'); 
%numsellers(b = IP, c = 'B00FLYWNYQ'); 
%numsellers(b = CrockP, c = 'B003HF6PUO'); 



/***covariate creation**/
/*proc contents data = SELLER_TYPE_TREND;*/
/*run;*/
/**/
proc sort data = SELLER_TYPE_TREND1 out = SELLER_TYPE_TREND11;
by time1;
run;
data SELLER_TYPE_TREND2;
set SELLER_TYPE_TREND11;
if seller_type in ('3P Old Focal', '3P Old Non-Focal') then seller_type = '3P Old';
else if seller_type in ('Amazon Old Focal', 'Amazon Old Non-Focal') then seller_type = 'Amazon Old';
else seller_type = seller_type;
run;

/**seller characteristics trend***/
proc freq data = SELLER_TYPE_TREND2;
tables seller_type;
run;

/*subset for 3p new sellers*/
data SELLER_TYPE_TREND3;
set SELLER_TYPE_TREND2;
where seller_type not in ('Amazon Ne', '3P Old Fo', '3P Old No', 'Amazon Ol');
run;

PROC SQL;
CREATE TABLE SELLER_charcs_byasin AS
SELECT
TIME1,
ASIN,
SELLER_TYPE,
mean(FBA) as mean_FBA,
/*std(FBA) as sd_FBA,*/
mean(Freeship) as mean_freeship,
/*std(Freeship) as sd_freeship,*/
mean(Prime) as mean_prime,
/*std(Prime) as sd_prime,*/
sum(condition) as new_offers,
mean(num_seller_Posrating) as mean_posrating,
/*std(num_seller_Posrating) as sd_posrating,*/
mean(num_seller_ratings) as mean_numratings,
/*std(num_seller_ratings) as sd_numratings,*/
mean(seller_rate) as mean_starrate
/*std(seller_rate) as sd_starrate*/
from 
SELLER_TYPE_TREND3
group by
time1,
ASIN,
seller_type;
quit;

proc sort data = SELLER_charcs_byasin (keep=time1) nodupkey out= SELLER_charcs_byasin2;
by time1;
run;


%macro sellerchar(b,c);
data SELLER_charcs_byasin&b. (keep = time1 mean_FBA&b. 
mean_freeship&b. mean_numratings&b.   
mean_posrating&b. 
mean_prime&b.   
mean_starrate&b.
/*sd_FBA&b.*/
/*sd_freeship&b. */
/*sd_numratings&b.    */
/*sd_posrating&b.    */
/*sd_prime&b.  */
/*sd_starrate&b.*/
new_offers&b.);
set SELLER_charcs_byasin;
where ASIN = &c. ;
rename mean_FBA = mean_FBA&b.;
rename mean_freeship = mean_freeship&b.;  
rename mean_numratings = mean_numratings&b.;    
rename mean_posrating =  mean_posrating&b.;    
rename mean_prime =  mean_prime&b.;    
rename mean_starrate = mean_starrate&b.;
/*rename sd_FBA = sd_FBA&b.;*/
/*rename sd_freeship =  sd_freeship&b.;  */
/*rename sd_numratings = sd_numratings&b.;     */
/*rename sd_posrating = sd_posrating&b.;     */
/*rename sd_prime =  sd_prime&b.;   */
/*rename sd_starrate = sd_starrate&b.;*/
rename new_offers = new_offers&b.;
run;

data SELLER_charcs_byasin2 ;
merge SELLER_charcs_byasin2 (in=a) SELLER_charcs_byasin&b. (in=b);
by time1;
if a ;
if a and not b then do;
mean_FBA&b. = 0;
mean_freeship&b. = 0;
mean_posrating&b. = 0;
mean_prime&b. =0;
mean_starrate&b. = 0;
/*sd_FBA&b.=0;*/
/*sd_freeship&b.=0;*/
/*sd_numratings&b.=0;*/
/*sd_posrating&b.=0;*/
/*sd_prime&b.=0;*/
/*sd_starrate&b.=0;*/
/*new_offers&b.=0;*/
end;
run;
%mend;
%sellerchar(b = BR_3p, c = 'B013I40R8E'); 
%sellerchar(b = CR_3p, c = 'B06VV9M11N');  
%sellerchar(b = EP_3p, c = 'B0051O92U4'); 
%sellerchar(b = IP_3p, c = 'B00FLYWNYQ'); 
%sellerchar(b = CP_3p, c = 'B003HF6PUO'); 


/**coding all missings with 0 values**/

proc contents data = SELLER_charcs_byasin2;
run;

data seller_characs_byasin3;
set SELLER_charcs_byasin2;

if 	mean_FBABR_3p	 = .	then 	mean_FBABR_3p	= 0;
if 	mean_FBACP_3p	 = .	then 	mean_FBACP_3p	= 0;
if 	mean_FBACR_3p	 = .	then 	mean_FBACR_3p	= 0;
if 	mean_FBAEP_3p	 = .	then 	mean_FBAEP_3p	= 0;
if 	mean_FBAIP_3p	 = .	then 	mean_FBAIP_3p	= 0;
if 	mean_freeshipBR_3p	 = .	then 	mean_freeshipBR_3p	= 0;
if 	mean_freeshipCP_3p	 = .	then 	mean_freeshipCP_3p	= 0;
if 	mean_freeshipCR_3p	 = .	then 	mean_freeshipCR_3p	= 0;
if 	mean_freeshipEP_3p	 = .	then 	mean_freeshipEP_3p	= 0;
if 	mean_freeshipIP_3p	 = .	then 	mean_freeshipIP_3p	= 0;
if 	mean_numratingsBR_3p	 = .	then 	mean_numratingsBR_3p	= 0;
if 	mean_numratingsCP_3p	 = .	then 	mean_numratingsCP_3p	= 0;
if 	mean_numratingsCR_3p	 = .	then 	mean_numratingsCR_3p	= 0;
if 	mean_numratingsEP_3p	 = .	then 	mean_numratingsEP_3p	= 0;
if 	mean_numratingsIP_3p	 = .	then 	mean_numratingsIP_3p	= 0;
if 	mean_posratingBR_3p	 = .	then 	mean_posratingBR_3p	= 0;
if 	mean_posratingCP_3p	 = .	then 	mean_posratingCP_3p	= 0;
if 	mean_posratingCR_3p	 = .	then 	mean_posratingCR_3p	= 0;
if 	mean_posratingEP_3p	 = .	then 	mean_posratingEP_3p	= 0;
if 	mean_posratingIP_3p	 = .	then 	mean_posratingIP_3p	= 0;
if 	mean_primeBR_3p	 = .	then 	mean_primeBR_3p	= 0;
if 	mean_primeCP_3p	 = .	then 	mean_primeCP_3p	= 0;
if 	mean_primeCR_3p	 = .	then 	mean_primeCR_3p	= 0;
if 	mean_primeEP_3p	 = .	then 	mean_primeEP_3p	= 0;
if 	mean_primeIP_3p	 = .	then 	mean_primeIP_3p	= 0;
if 	mean_starrateBR_3p	 = .	then 	mean_starrateBR_3p	= 0;
if 	mean_starrateCP_3p	 = .	then 	mean_starrateCP_3p	= 0;
if 	mean_starrateCR_3p	 = .	then 	mean_starrateCR_3p	= 0;
if 	mean_starrateEP_3p	 = .	then 	mean_starrateEP_3p	= 0;
if 	mean_starrateIP_3p	 = .	then 	mean_starrateIP_3p	= 0;
if 	new_offersBR_3p	 = .	then 	new_offersBR_3p	= 0;
if 	new_offersCP_3p	 = .	then 	new_offersCP_3p	= 0;
if 	new_offersCR_3p	 = .	then 	new_offersCR_3p	= 0;
if 	new_offersEP_3p	 = .	then 	new_offersEP_3p	= 0;
if 	new_offersIP_3p	 = .	then 	new_offersIP_3p	= 0;

run;

proc sort data = seller_characs_byasin3 nodupkey out=seller_characs_byasin4 ;
by  TIME1 ;
run;

proc expand data=seller_characs_byasin4 out=seller_characs_byasin5 method = none; 
convert	mean_FBABR_3p	=	mean_FBABR_3p_lag		/transformout = (lag 1);
convert	mean_FBACP_3p	=	mean_FBACP_3p_lag		/transformout = (lag 1);
convert	mean_FBACR_3p	=	mean_FBACR_3p_lag		/transformout = (lag 1);
convert	mean_FBAEP_3p	=	mean_FBAEP_3p_lag		/transformout = (lag 1);
convert	mean_FBAIP_3p	=	mean_FBAIP_3p_lag		/transformout = (lag 1);
convert	mean_freeshipBR_3p	=	mean_freeshipBR_3p_lag		/transformout = (lag 1);
convert	mean_freeshipCP_3p	=	mean_freeshipCP_3p_lag		/transformout = (lag 1);
convert	mean_freeshipCR_3p	=	mean_freeshipCR_3p_lag		/transformout = (lag 1);
convert	mean_freeshipEP_3p	=	mean_freeshipEP_3p_lag		/transformout = (lag 1);
convert	mean_freeshipIP_3p	=	mean_freeshipIP_3p_lag		/transformout = (lag 1);
convert	mean_numratingsBR_3p	=	mean_numratingsBR_3p_lag		/transformout = (lag 1);
convert	mean_numratingsCP_3p	=	mean_numratingsCP_3p_lag		/transformout = (lag 1);
convert	mean_numratingsCR_3p	=	mean_numratingsCR_3p_lag		/transformout = (lag 1);
convert	mean_numratingsEP_3p	=	mean_numratingsEP_3p_lag		/transformout = (lag 1);
convert	mean_numratingsIP_3p	=	mean_numratingsIP_3p_lag		/transformout = (lag 1);
convert	mean_posratingBR_3p	=	mean_posratingBR_3p_lag		/transformout = (lag 1);
convert	mean_posratingCP_3p	=	mean_posratingCP_3p_lag		/transformout = (lag 1);
convert	mean_posratingCR_3p	=	mean_posratingCR_3p_lag		/transformout = (lag 1);
convert	mean_posratingEP_3p	=	mean_posratingEP_3p_lag		/transformout = (lag 1);
convert	mean_posratingIP_3p	=	mean_posratingIP_3p_lag		/transformout = (lag 1);
convert	mean_primeBR_3p	=	mean_primeBR_3p_lag		/transformout = (lag 1);
convert	mean_primeCP_3p	=	mean_primeCP_3p_lag		/transformout = (lag 1);
convert	mean_primeCR_3p	=	mean_primeCR_3p_lag		/transformout = (lag 1);
convert	mean_primeEP_3p	=	mean_primeEP_3p_lag		/transformout = (lag 1);
convert	mean_primeIP_3p	=	mean_primeIP_3p_lag		/transformout = (lag 1);
convert	mean_starrateBR_3p	=	mean_starrateBR_3p_lag		/transformout = (lag 1);
convert	mean_starrateCP_3p	=	mean_starrateCP_3p_lag		/transformout = (lag 1);
convert	mean_starrateCR_3p	=	mean_starrateCR_3p_lag		/transformout = (lag 1);
convert	mean_starrateEP_3p	=	mean_starrateEP_3p_lag		/transformout = (lag 1);
convert	mean_starrateIP_3p	=	mean_starrateIP_3p_lag		/transformout = (lag 1);
convert	new_offersBR_3p	=	new_offersBR_3p_lag		/transformout = (lag 1);
convert	new_offersCP_3p	=	new_offersCP_3p_lag		/transformout = (lag 1);
convert	new_offersCR_3p	=	new_offersCR_3p_lag		/transformout = (lag 1);
convert	new_offersEP_3p	=	new_offersEP_3p_lag		/transformout = (lag 1);
convert	new_offersIP_3p	=	new_offersIP_3p_lag		/transformout = (lag 1);

run;

/*****REMAINING COVARIATES ARE FROM THE ORIGINAL DV - FINAL DATA SET*****/
/**this is the revised data set after round 4 -- includes the lagged external sites**/
/*PROC IMPORT*/
/*DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Home_Kitchen\Cluster Analysis\gam_modeldata_July_2025_JMRnd4.csv" */
/*out=final_gam_modeldata*/
/*DBMS=csv REPLACE;*/
/*run;*/

data final_gam_modeldata;
set inn.final_gam_modeldata;
run;
proc contents data = final_gam_modeldata;
run;

proc sort data = final_numsellers_outputvector out = sorted_finalnumsellersoutput;
by time1;
run;

proc sort data = final_gam_modeldata out = sorted_final_GAMMODELDATA;
by time1;
run;


proc sort data = seller_characs_byasin5 out = sorted_seller_characs_byasin5;
by time1;
run;


data finalnumsellers_modeldata;
merge sorted_finalnumsellersoutput (in=a) sorted_final_GAMMODELDATA (in=b) sorted_seller_characs_byasin5 (in=c);
by time1;
if a ;
run;
proc contents data = finalnumsellers_modeldata;
run;

/**Adding special Amazon holidays***/

/*Valentine's day sale: Feb 5th - Feb 14th 

Easter day sale: March 19-March 26 (2018)

Mothers day: May 13 (2018)

Amazon prime day: JUly 16 (2018); post-prime day sale July 18 (2018)

Black Friday:
    Nov 17th - Nov 24th (2017)
    Nov 16th - Nov 23rd (2018)

Cyber Monday:
	Nov 27th (2017), Nov 26th (2018)

Amazon digital day: Dec 29 (2017), Dec 28 (2018)

12 days of deal sale: Dec 3rd - Dec 14 (2017), Dec 2nd - Dec 13 (2018)

Christmas day sale: Dec 20-Dec25

Hanukkah sale: Dec 1- Dec 7

Post Christmas sale: Dec 26- Jan 1st*/


data finalnumsellers_modeldata1;
set finalnumsellers_modeldata;
if time1 in ( '01Jan2018'd , 
              '05Feb2018'd, '06Feb2018'd,'07Feb2018'd,'08Feb2018'd,'09Feb2018'd,'10Feb2018'd,'11Feb2018'd,
              '12Feb2018'd, '13Feb2018'd,'14Feb2018'd,
			  '19Mar2018'd, '20Mar2018'd,'21Mar2018'd,'22Mar2018'd,'23Mar2018'd,'24Mar2018'd,'25Mar2018'd,'26Mar2018'd,
			  '13May2018'd, '16Jul2018'd, '18Jul2018'd, 
              '17Nov2017'd,'18Nov2017'd,'19Nov2017'd,'20Nov2017'd,'21Nov2017'd,'22Nov2017'd, '23Nov2017'd,'24Nov2017'd,'27Nov2017'd,
			  '16Nov2018'd,'17Nov2018'd,'18Nov2018'd,'19Nov2018'd,'20Nov2018'd,'21Nov2018'd,'22Nov2018'd, '23Nov2018'd,'26Nov2018'd,
              '03Dec2017'd,'04Dec2017'd,'05Dec2017'd,'06Dec2017'd,'07Dec2017'd,'08Dec2017'd,
              '09Dec2017'd,'10Dec2017'd,'11Dec2017'd,'12Dec2017'd,'13Dec2017'd,'14Dec2017'd,
			  '02Dec2018'd,'03Dec2018'd,'04Dec2018'd,'05Dec2018'd,'06Dec2018'd,'07Dec2018'd,'08Dec2018'd,
              '09Dec2018'd,'10Dec2018'd,'11Dec2018'd,'12Dec2018'd,'13Dec2018'd) then seasonal_sale = 1;
else if time1 >= '20Dec2017'd and time1 <= '31Dec2017'd then seasonal_sale = 1;
else if time1 >= '20Dec2018'd and time1 <= '31Dec2018'd then seasonal_sale = 1;
else seasonal_sale = 0;
run;

proc print data = finalnumsellers_modeldata1 (obs=200);
var priceclust4_Breville priceclust3_crockp;
run;
proc contents data = finalnumsellers_modeldata1;
run;

proc freq data = finalnumsellers_modeldata1;
tables seasonal_sale ;
run;
proc print data = finalnumsellers_modeldata1 (obs=20);
var nonbbox3pprice_brev_lag ;
run;

/*PROC EXPORT */
/*DATA=finalnumsellers_modeldata1*/
/*DBMS=csv*/
/*OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Home_Kitchen\Cluster Analysis\finalnumsellers_dataJuly2025rnd4.csv"*/
/*REPLACE;*/
/*PUTNAMES=YES;*/
/*run;*/


PROC EXPORT 
DATA=finalnumsellers_modeldata1
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Home_Kitchen\Cluster Analysis\finalnumsellers_dataJuly2025rnd4_all.csv"
REPLACE;
PUTNAMES=YES;
run;
/**/
/*PROC IMPORT*/
/*DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Home_Kitchen\Cluster Analysis\finalnumsellers_modeldata1_rnd4.csv" */
/*out=finalnumsellers_modeldata1*/
/*DBMS=csv REPLACE;*/
/*run;*/
/*proc contents data = finalnumsellers_modeldata1;*/
/*run;*/
/**/
/*proc print data = finalnumsellers_modeldata1 (obs=50);*/
/*var new_offersBR_clust4_lag1 num_brvclust4_lag1 pricechangesbrvamzn_lag1;*/
/*run;*/
